package spider_lib

// 基础包
import (
    "github.com/PuerkitoBio/goquery"                        //DOM解析
    "github.com/henrylee2cn/pholcus/app/downloader/request" //必需
    // "github.com/henrylee2cn/pholcus/logs"              //信息输出
    . "github.com/henrylee2cn/pholcus/app/spider" //必需
    // . "github.com/henrylee2cn/pholcus/app/spider/common" //选用
    // net包
    // "net/http" //设置http.Header
    // "net/url"
    // 编码包
    // "encoding/xml"
    // "encoding/json"
    // 字符串处理包
    // "regexp"
    // "strconv"
    // "strings"
    // 其他包
    // "fmt"
    // "math"
    // "time"
)

func init() {
    Kaola.Register()
}

// 考拉海淘,海外直采,7天无理由退货,售后无忧!考拉网放心的海淘网站!
var Kaola = &Spider{
    Name:        "考拉海淘",
    Description: "考拉海淘商品数据 [Auto Page] [www.kaola.com]",
    // Pausetime: 300,
    // Keyin:   KEYIN,
    // Limit:        LIMIT,
    EnableCookie: false,
    RuleTree: &RuleTree{
        Root: func(ctx *Context) {
            ctx.AddQueue(&request.Request{Url: "http://www.kaola.com", Rule: "获取版块URL"})
        },

        Trunk: map[string]*Rule{

            "获取版块URL": {
                ParseFunc: func(ctx *Context) {
                    query := ctx.GetDom()
                    lis := query.Find("#funcTab li a")
                    lis.Each(func(i int, s *goquery.Selection) {
                        if i == 0 {
                            return
                        }
                        if url, ok := s.Attr("href"); ok {
                            ctx.AddQueue(&request.Request{Url: url, Rule: "商品列表", Temp: map[string]interface{}{"goodsType": s.Text()}})
                        }
                    })
                },
            },

            "商品列表": {
                ParseFunc: func(ctx *Context) {
                    query := ctx.GetDom()
                    query.Find(".proinfo").Each(func(i int, s *goquery.Selection) {
                        if url, ok := s.Find("a").Attr("href"); ok {
                            ctx.AddQueue(&request.Request{
                                Url:  "http://www.kaola.com" + url,
                                Rule: "商品详情",
                                Temp: map[string]interface{}{"goodsType": ctx.GetTemp("goodsType", "").(string)},
                            })
                        }
                    })
                },
            },

            "商品详情": {
                //注意：有无字段语义和是否输出数据必须保持一致
                ItemFields: []string{
                    "标题",
                    "价格",
                    "品牌",
                    "采购地",
                    "评论数",
                    "类别",
                },
                ParseFunc: func(ctx *Context) {
                    query := ctx.GetDom()
                    // 获取标题
                    title := query.Find(".product-title").Text()

                    // 获取价格
                    price := query.Find("#js_currentPrice span").Text()

                    // 获取品牌
                    brand := query.Find(".goods_parameter li").Eq(0).Text()

                    // 获取采购地
                    from := query.Find(".goods_parameter li").Eq(1).Text()

                    // 获取评论数
                    discussNum := query.Find("#commentCounts").Text()

                    // 结果存入Response中转
                    ctx.Output(map[int]interface{}{
                        0: title,
                        1: price,
                        2: brand,
                        3: from,
                        4: discussNum,
                        5: ctx.GetTemp("goodsType", ""),
                    })
                },
            },
        },
    },
}
